In [1]:
import plotly
plotly.offline.init_notebook_mode()
import cufflinks as cf
cf.go_offline()
from plotly.graph_objs import Bar,Layout, Figure,Data,Scattermapbox,Marker,Surface,XAxis,YAxis,ZAxis,Scene,Scatter
In [2]:
import pandas as pd
import numpy as np

Scatter plot using pandas

In [3]:
df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
country      1704 non-null object
year         1704 non-null int64
pop          1704 non-null float64
continent    1704 non-null object
lifeExp      1704 non-null float64
gdpPercap    1704 non-null float64
dtypes: float64(3), int64(1), object(2)
memory usage: 80.0+ KB
In [34]:
df2007 = df[df.year==2007]
df1952 = df[df.year==1952]
In [8]:
df2007.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 142 entries, 11 to 1703
Data columns (total 6 columns):
country      142 non-null object
year         142 non-null int64
pop          142 non-null float64
continent    142 non-null object
lifeExp      142 non-null float64
gdpPercap    142 non-null float64
dtypes: float64(3), int64(1), object(2)
memory usage: 7.8+ KB
In [9]:
df1952.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 142 entries, 0 to 1692
Data columns (total 6 columns):
country      142 non-null object
year         142 non-null int64
pop          142 non-null float64
continent    142 non-null object
lifeExp      142 non-null float64
gdpPercap    142 non-null float64
dtypes: float64(3), int64(1), object(2)
memory usage: 7.8+ KB
In [19]:
s1=Scatter(x=df2007.gdpPercap,
          y=df2007.lifeExp,
          mode='markers',
          marker={'color':"#FF7F50"},
          name='2007'
         )
s2=Scatter(x=df1952.gdpPercap,
          y=df1952.lifeExp,
          mode='markers',
          marker={'color':"#4682B4"},
          name='1952'
         )
In [25]:
layout=Layout(title='GDP plot',
              xaxis={'title': 'GDP per Capita','type':'log'},
              yaxis={'title': "Life Expectancy"})
In [26]:
plotly.offline.iplot(Figure(data=[s1,s2],layout=layout))
In [35]:
df2007['loggdpPercap']=df2007['gdpPercap'].map(lambda x:np.log(x))
df1952['loggdpPercap']=df1952['gdpPercap'].map(lambda x:np.log(x))
//home/vmuser/.pyenv/versions/3.5.2/lib/python3.5/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

//home/vmuser/.pyenv/versions/3.5.2/lib/python3.5/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [36]:
df2007.iplot(kind='scatter', mode='markers', x='loggdpPercap', y='lifeExp',color="#FF7F50")
df1952.iplot(kind='scatter', mode='markers', x='loggdpPercap', y='lifeExp',color="#4682B4")